Fragestellung: “Die Manschafft, die zur Halbzeit vorne liegt, gewinnt mit einer Chance von mindestens 75% auch das Spiel. Falls zur Halbzeit unentschieden ist, gewinnt eher das Heimteam.”

Dafür nehmen wir den Datacamp Datensatz Soccer Data

Als Einführung werden wir auf Datacamp folgende Kurse durchgehen:

library("RColorBrewer")
Warning: Paket ‘RColorBrewer’ wurde unter R Version 4.1.3 erstellt

Daten einlesen und Dataframe erstellen

# List files in folder "Data"
files <- list.files(path="./Data/", pattern=NULL, all.files=FALSE, full.names=TRUE)

# Create Dataframe with all csv from years 2015-2019
df <- ldply(.data = files, .fun = read.csv)

View(df)
# Count frequency of haltime & fulltime results
df_htr <- df %>% count(HTR)
df_ftr <- df %>% count(FTR)
# Create dataframe with halttime & fulltime result amounts
df_results <- data.frame(c("Away", "Draw", "Home"), c(df_htr$n), c(df_ftr$n))

# Rename column headers
col_headings <- c('Result','Halftime','Fulltime')
names(df_results) <- col_headings

# Plot grouped bar chart to visualize halftime & fulltime results
fig <- plot_ly(
  df_results, x = ~Results, y = ~Halftime, type = 'bar', name = 'Half Time Score') %>% 
  add_trace(y = ~Fulltime, name = 'Full Time Score') %>%
  layout(yaxis = list(title = 'Count'), barmode = 'group')

fig
# merge HTR & FTR to 1 column
df$result <- paste(df$HTR, df$FTR)

print("Example: H H = home team is winning at halftime and also wins the game at fulltime")
[1] "Example: H H = home team is winning at halftime and also wins the game at fulltime"
# Plot all different game progresses and their amount
df %>%
  count(result) %>%
  mutate(result = fct_reorder(result, n, .desc = TRUE)) %>%
  plot_ly(x = ~result, y = ~n, text = ~n, textposition = 'auto') %>%
  add_bars() %>%
  layout(xaxis = list(title = "Game Progress"),
         yaxis = list(title = "Amount"),
         title = "How are the different game progresses distributed?")
# Calculate probability 
calc_prob <- function(df1, df2) {
  prob <- round((100 / nrow(df1) * nrow(df2)), digits = 2)
  return(prob)
}
# Filter home teams winning at halftime
df_ht_home <- df %>% 
  filter(HTR == "H")

# Filter home teams winning at halftime & fulltime
df_ft_home <- df_ht_home %>% 
  filter(FTR == "H")

home_win_prob <- calc_prob(df_ht_home, df_ft_home)

print(nrow(df_ft_home))
[1] 421
cat("Probability that the home team wins the game if they are leading at half time: ", home_win_prob, "%")
Probability that the home team wins the game if they are leading at half time:  82.55 %
# Filter away teams winning at halftime
df_ht_away <- df %>% 
  filter(HTR == "A")

# Filter away teams winning at halftime & fulltime
df_ft_away <- df_ht_away %>% 
  filter(FTR == "A")

away_win_prob <- calc_prob(df_hf_away, df_ft_away)
print(nrow(df_ft_away))
[1] 273
cat("Probability that the away team wins the game if they are leading at half time: ", away_win_prob, "%")
Probability that the away team wins the game if they are leading at half time:  72.03 %
cat("Probability that the game ends in a draw if the halftime result is also a draw: ", draw_prob, "%")
Probability that the game ends in a draw if the halftime result is also a draw:  36.45 %
# Filter draw at halftime & the home team winning at fulltime
df_ht_draw_ft_home_win <- df_ht_draw %>%
  filter(FTR == "H")

home_win_after_ht_draw_prob <- calc_prob(df_hf_draw, df_ht_draw_ft_home_win)
cat("Probability that the home team wins if the halftime result is a draw: ", home_win_after_ht_draw_prob, "%")
Probability that the home team wins if the halftime result is a draw:  38.03 %
# Probability that the team winning at half time wins the game
ht_ft_win_prob <- round(((home_win_prob * nrow(df_ft_home)) + (away_win_prob * nrow(df_ft_away))) / (nrow(df_ft_home) + nrow(df_ft_away)), digits = 2)

cat("Probability that the team leading at half time wins the entire game: ", ht_ft_win_prob, "%")
Probability that the team leading at half time wins the entire game:  78.41 %
LS0tDQp0aXRsZTogIkRhdGF2aXogbWl0IFBsb3RseSBQTCBEYXRhIg0Kb3V0cHV0OiBodG1sX25vdGVib29rDQotLS0NCg0KIyMgRnJhZ2VzdGVsbHVuZzogIkRpZSBNYW5zY2hhZmZ0LCBkaWUgenVyIEhhbGJ6ZWl0IHZvcm5lIGxpZWd0LCBnZXdpbm50IG1pdCBlaW5lciBDaGFuY2Ugdm9uIG1pbmRlc3RlbnMgNzUlIGF1Y2ggZGFzIFNwaWVsLiBGYWxscyB6dXIgSGFsYnplaXQgdW5lbnRzY2hpZWRlbiBpc3QsIGdld2lubnQgZWhlciBkYXMgSGVpbXRlYW0uIg0KDQoNCkRhZsO8ciBuZWhtZW4gd2lyIGRlbiBEYXRhY2FtcCBEYXRlbnNhdHogW1NvY2NlciBEYXRhXShodHRwczovL2FwcC5kYXRhY2FtcC5jb20vd29ya3NwYWNlL2RhdGFzZXRzL2RhdGFzZXQtcHl0aG9uLXNvY2NlcikNCg0KQWxzIEVpbmbDvGhydW5nIHdlcmRlbiB3aXIgYXVmIERhdGFjYW1wIGZvbGdlbmRlIEt1cnNlIGR1cmNoZ2VoZW46DQoNCi0gW0ludGVyYWN0aXZlIERhdGEgVmlzdWFsaXphdGlvbiB3aXRoIHBsb3RseV0oaHR0cHM6Ly9hcHAuZGF0YWNhbXAuY29tL2xlYXJuL2NvdXJzZXMvaW50ZXJhY3RpdmUtZGF0YS12aXN1YWxpemF0aW9uLXdpdGgtcGxvdGx5LWluLXIpDQoNCi0gW0ludGVybWVkaWF0ZSBJbnRlcmFjdGl2ZSBEYXRhIFZpc3VhbGl6YXRpb24gd2l0aCBwbG90bHldKGh0dHBzOi8vYXBwLmRhdGFjYW1wLmNvbS9sZWFybi9jb3Vyc2VzL2ludGVyYWN0aXZlLWRhdGEtdmlzdWFsaXphdGlvbi13aXRoLXBsb3RseS1pbi1yKQ0KDQpgYGB7cn0NCiMgQmlibGlvdGhla2VuIGltcG9ydGllcmVuDQpsaWJyYXJ5KCJwbG90bHkiKQ0KbGlicmFyeSgicGx5ciIpDQpsaWJyYXJ5KCJkcGx5ciIpDQpsaWJyYXJ5KCJmb3JjYXRzIikNCmxpYnJhcnkoIlJDb2xvckJyZXdlciIpDQpgYGANCiMjIERhdGVuIGVpbmxlc2VuIHVuZCBEYXRhZnJhbWUgZXJzdGVsbGVuDQpgYGB7cn0NCiMgTGlzdCBmaWxlcyBpbiBmb2xkZXIgIkRhdGEiDQpmaWxlcyA8LSBsaXN0LmZpbGVzKHBhdGg9Ii4vRGF0YS8iLCBwYXR0ZXJuPU5VTEwsIGFsbC5maWxlcz1GQUxTRSwgZnVsbC5uYW1lcz1UUlVFKQ0KDQojIENyZWF0ZSBEYXRhZnJhbWUgd2l0aCBhbGwgY3N2IGZyb20geWVhcnMgMjAxNS0yMDE5DQpkZiA8LSBsZHBseSguZGF0YSA9IGZpbGVzLCAuZnVuID0gcmVhZC5jc3YpDQoNClZpZXcoZGYpDQpgYGANCg0KYGBge3J9DQojIENvdW50IGZyZXF1ZW5jeSBvZiBoYWx0aW1lICYgZnVsbHRpbWUgcmVzdWx0cw0KZGZfaHRyIDwtIGRmICU+JSBjb3VudChIVFIpDQpkZl9mdHIgPC0gZGYgJT4lIGNvdW50KEZUUikNCmBgYA0KDQpgYGB7cn0NCiMgQ3JlYXRlIGRhdGFmcmFtZSB3aXRoIGhhbHR0aW1lICYgZnVsbHRpbWUgcmVzdWx0IGFtb3VudHMNCmRmX3Jlc3VsdHMgPC0gZGF0YS5mcmFtZShjKCJBd2F5IiwgIkRyYXciLCAiSG9tZSIpLCBjKGRmX2h0ciRuKSwgYyhkZl9mdHIkbikpDQoNCiMgUmVuYW1lIGNvbHVtbiBoZWFkZXJzDQpjb2xfaGVhZGluZ3MgPC0gYygnUmVzdWx0JywnSGFsZnRpbWUnLCdGdWxsdGltZScpDQpuYW1lcyhkZl9yZXN1bHRzKSA8LSBjb2xfaGVhZGluZ3MNCg0KIyBQbG90IGdyb3VwZWQgYmFyIGNoYXJ0IHRvIHZpc3VhbGl6ZSBoYWxmdGltZSAmIGZ1bGx0aW1lIHJlc3VsdHMNCmZpZyA8LSBwbG90X2x5KA0KICBkZl9yZXN1bHRzLCB4ID0gflJlc3VsdHMsIHkgPSB+SGFsZnRpbWUsIHR5cGUgPSAnYmFyJywgbmFtZSA9ICdIYWxmIFRpbWUgU2NvcmUnKSAlPiUgDQogIGFkZF90cmFjZSh5ID0gfkZ1bGx0aW1lLCBuYW1lID0gJ0Z1bGwgVGltZSBTY29yZScpICU+JQ0KICBsYXlvdXQoeWF4aXMgPSBsaXN0KHRpdGxlID0gJ0NvdW50JyksIGJhcm1vZGUgPSAnZ3JvdXAnKQ0KDQpmaWcNCmBgYA0KYGBge3J9DQojIG1lcmdlIEhUUiAmIEZUUiB0byAxIGNvbHVtbg0KZGYkcmVzdWx0IDwtIHBhc3RlKGRmJEhUUiwgZGYkRlRSKQ0KDQpwcmludCgiRXhhbXBsZTogSCBIID0gaG9tZSB0ZWFtIGlzIHdpbm5pbmcgYXQgaGFsZnRpbWUgYW5kIGFsc28gd2lucyB0aGUgZ2FtZSBhdCBmdWxsdGltZSIpDQpgYGANCg0KYGBge3J9DQojIFBsb3QgYWxsIGRpZmZlcmVudCBnYW1lIHByb2dyZXNzZXMgYW5kIHRoZWlyIGFtb3VudA0KZGYgJT4lDQogIGNvdW50KHJlc3VsdCkgJT4lDQogIG11dGF0ZShyZXN1bHQgPSBmY3RfcmVvcmRlcihyZXN1bHQsIG4sIC5kZXNjID0gVFJVRSkpICU+JQ0KICBwbG90X2x5KHggPSB+cmVzdWx0LCB5ID0gfm4sIHRleHQgPSB+biwgdGV4dHBvc2l0aW9uID0gJ2F1dG8nKSAlPiUNCiAgYWRkX2JhcnMoKSAlPiUNCiAgbGF5b3V0KHhheGlzID0gbGlzdCh0aXRsZSA9ICJHYW1lIFByb2dyZXNzIiksDQogICAgICAgICB5YXhpcyA9IGxpc3QodGl0bGUgPSAiQW1vdW50IiksDQogICAgICAgICB0aXRsZSA9ICJIb3cgYXJlIHRoZSBkaWZmZXJlbnQgZ2FtZSBwcm9ncmVzc2VzIGRpc3RyaWJ1dGVkPyIpDQpgYGANCmBgYHtyfQ0KIyBHcm91cCBieSBnYW1lIG91dGNvbWUgJiBjYWxjdWxhdGUgcHJvYmFiaWxpdHkgb2YgYWxsIG91dGNvbWVzDQpkZl9jb3VudF9yZXN1bHRzIDwtIGRmICU+JSANCiAgZ3JvdXBfYnkocmVzdWx0KSAlPiUgDQogIHN1bW1hcmlzZShjb3VudF9yZXN1bHQgPSByb3VuZChuKCkgLyBucm93KGRmKSAqIDEwMCwgZGlnaXRzID0gMikpDQoNCmRmX2NvdW50X3Jlc3VsdHMgJT4lDQogIHBsb3RfbHkobGFiZWxzID0gfnJlc3VsdCwgdmFsdWVzID0gfmNvdW50X3Jlc3VsdCkgJT4lDQogIGFkZF9waWUoaG9sZSA9IDAuNCwgY29sb3IgPSBJKCJ3aGl0ZSIpKSAlPiUNCiAgbGF5b3V0KHhheGlzID0gbGlzdCh0aXRsZSA9ICJHYW1lIFByb2dyZXNzIiksDQogICAgICAgICB5YXhpcyA9IGxpc3QodGl0bGUgPSAiUHJvYmFiaWxpdHkgJSIpLA0KICAgICAgICAgdGl0bGUgPSAiV2hhdCBpcyB0aGUgcHJvYmFiaWxpdHkgb2YgZWFjaCBnYW1lIHByb2dyZXNzPyIpDQpgYGANCg0KYGBge3J9DQojIENhbGN1bGF0ZSBwcm9iYWJpbGl0eSANCmNhbGNfcHJvYiA8LSBmdW5jdGlvbihkZjEsIGRmMikgew0KICBwcm9iIDwtIHJvdW5kKCgxMDAgLyBucm93KGRmMSkgKiBucm93KGRmMikpLCBkaWdpdHMgPSAyKQ0KICByZXR1cm4ocHJvYikNCn0NCmBgYA0KDQpgYGB7cn0NCiMgRmlsdGVyIGhvbWUgdGVhbXMgd2lubmluZyBhdCBoYWxmdGltZQ0KZGZfaHRfaG9tZSA8LSBkZiAlPiUgDQogIGZpbHRlcihIVFIgPT0gIkgiKQ0KDQojIEZpbHRlciBob21lIHRlYW1zIHdpbm5pbmcgYXQgaGFsZnRpbWUgJiBmdWxsdGltZQ0KZGZfZnRfaG9tZSA8LSBkZl9odF9ob21lICU+JSANCiAgZmlsdGVyKEZUUiA9PSAiSCIpDQoNCmhvbWVfd2luX3Byb2IgPC0gY2FsY19wcm9iKGRmX2h0X2hvbWUsIGRmX2Z0X2hvbWUpDQoNCmNhdCgiUHJvYmFiaWxpdHkgdGhhdCB0aGUgaG9tZSB0ZWFtIHdpbnMgdGhlIGdhbWUgaWYgdGhleSBhcmUgbGVhZGluZyBhdCBoYWxmIHRpbWU6ICIsIGhvbWVfd2luX3Byb2IsICIlIikNCmBgYA0KDQpgYGB7cn0NCiMgRmlsdGVyIGF3YXkgdGVhbXMgd2lubmluZyBhdCBoYWxmdGltZQ0KZGZfaHRfYXdheSA8LSBkZiAlPiUgDQogIGZpbHRlcihIVFIgPT0gIkEiKQ0KDQojIEZpbHRlciBhd2F5IHRlYW1zIHdpbm5pbmcgYXQgaGFsZnRpbWUgJiBmdWxsdGltZQ0KZGZfZnRfYXdheSA8LSBkZl9odF9hd2F5ICU+JSANCiAgZmlsdGVyKEZUUiA9PSAiQSIpDQoNCmF3YXlfd2luX3Byb2IgPC0gY2FsY19wcm9iKGRmX2hmX2F3YXksIGRmX2Z0X2F3YXkpDQoNCmNhdCgiUHJvYmFiaWxpdHkgdGhhdCB0aGUgYXdheSB0ZWFtIHdpbnMgdGhlIGdhbWUgaWYgdGhleSBhcmUgbGVhZGluZyBhdCBoYWxmIHRpbWU6ICIsIGF3YXlfd2luX3Byb2IsICIlIikNCmBgYA0KDQpgYGB7cn0NCiMgRmlsdGVyIGRyYXcgYXQgaGFsZnRpbWUNCmRmX2h0X2RyYXcgPC0gZGYgJT4lIA0KICBmaWx0ZXIoSFRSID09ICJEIikNCg0KIyBGaWx0ZXIgZHJhdyBhdCBoYWxmdGltZSAmIGZ1bGx0aW1lDQpkZl9mdF9kcmF3IDwtIGRmX2h0X2RyYXcgJT4lIA0KICBmaWx0ZXIoRlRSID09ICJEIikNCg0KZHJhd19wcm9iIDwtIGNhbGNfcHJvYihkZl9odF9kcmF3LCBkZl9mdF9kcmF3KQ0KDQpjYXQoIlByb2JhYmlsaXR5IHRoYXQgdGhlIGdhbWUgZW5kcyBpbiBhIGRyYXcgaWYgdGhlIGhhbGZ0aW1lIHJlc3VsdCBpcyBhbHNvIGEgZHJhdzogIiwgZHJhd19wcm9iLCAiJSIpDQpgYGANCg0KYGBge3J9DQojIEZpbHRlciBkcmF3IGF0IGhhbGZ0aW1lICYgdGhlIGhvbWUgdGVhbSB3aW5uaW5nIGF0IGZ1bGx0aW1lDQpkZl9odF9kcmF3X2Z0X2hvbWVfd2luIDwtIGRmX2h0X2RyYXcgJT4lDQogIGZpbHRlcihGVFIgPT0gIkgiKQ0KDQpob21lX3dpbl9hZnRlcl9odF9kcmF3X3Byb2IgPC0gY2FsY19wcm9iKGRmX2hmX2RyYXcsIGRmX2h0X2RyYXdfZnRfaG9tZV93aW4pDQoNCmNhdCgiUHJvYmFiaWxpdHkgdGhhdCB0aGUgaG9tZSB0ZWFtIHdpbnMgaWYgdGhlIGhhbGZ0aW1lIHJlc3VsdCBpcyBhIGRyYXc6ICIsIGhvbWVfd2luX2FmdGVyX2h0X2RyYXdfcHJvYiwgIiUiKQ0KYGBgDQoNCmBgYHtyfQ0KIyBQcm9iYWJpbGl0eSB0aGF0IHRoZSB0ZWFtIHdpbm5pbmcgYXQgaGFsZiB0aW1lIHdpbnMgdGhlIGdhbWUNCmh0X2Z0X3dpbl9wcm9iIDwtIHJvdW5kKCgoaG9tZV93aW5fcHJvYiAqIG5yb3coZGZfZnRfaG9tZSkpICsgKGF3YXlfd2luX3Byb2IgKiBucm93KGRmX2Z0X2F3YXkpKSkgLyAobnJvdyhkZl9mdF9ob21lKSArIG5yb3coZGZfZnRfYXdheSkpLCBkaWdpdHMgPSAyKQ0KDQpjYXQoIlByb2JhYmlsaXR5IHRoYXQgdGhlIHRlYW0gbGVhZGluZyBhdCBoYWxmIHRpbWUgd2lucyB0aGUgZW50aXJlIGdhbWU6ICIsIGh0X2Z0X3dpbl9wcm9iLCAiJSIpDQpgYGANCg0KYGBge3J9DQoNCmBgYA0KDQo=